In [1]:
import numpy as np
import pandas as pd 
import datetime
import matplotlib.pyplot as plt
import math 
from scipy import stats
In [2]:
filename1 = 'csv/311-ac-monthGroupList.csv'

df = pd.read_csv(filename1)
In [3]:
filename2 = 'csv/311-All-Cmplaint Type-Groupby.csv'

dfT50 = pd.read_csv(filename2).head(n=50)
rdfT50 = dfT50.sort_values(by=['size'])
In [4]:
df_whole=pd.read_csv('311-All-Concise.csv')
In [5]:
dfNoise = ['Noise - Residential' , 'Noise - Street/Sidewalk' ,'Noise - Commercial' ,'Noise - Vehicle','Noise']
dist = ['BRONX', 'BROOKLYN', 'MANHATTAN', 'QUEENS', 'STATEN ISLAND']
In [6]:
tree_noise = [ 'Noise - Street/Sidewalk' ,'Noise - Vehicle','Noise','Noise - Street/Sidewalk' ,'Noise - Vehicle','Noise']
new_tree = 'New Tree Request'
over_tree = 'Overgrown Tree/Branches'       

f1,p1 = plt.subplots(2, 3, figsize=(15,10))
k=0
n=0
big = 0
small = 0 
for kk in tree_noise:
    
    if n == 0:
        xx = new_tree
    else:
        xx = over_tree
    temp = df[df["Complaint Type"]==xx]
    
    temp2 = df[df["Complaint Type"]==kk]
    
    len1 = len(temp)
    len2 = len(temp2)
    if len1>len2:
        temp = temp.head(n = len2)
    else:
        temp2 = temp2.head(n = len1)
            
    slope, intercept, r_value, p_value, std_err = stats.linregress(temp['monthSize'],temp2['monthSize'])
    p1[n][k].scatter(temp['monthSize'],temp2['monthSize'],marker='o')
    p1[n][k].plot(temp['monthSize'], intercept + slope*temp['monthSize'], 'r',\
                  label='slope ~ R: '+str(round(slope, 2))+' ~ '+str(round(r_value, 2)))
    p1[n][k].set_xlabel(xx)
    p1[n][k].set_ylabel(kk)
    p1[n][k].legend(loc="upper left")
    if(k == 2):
        n=n+1
        k=0
    else:
        k=k+1

Scatter plot analysis

After analyze 5 noise with other 49 the highist rank complain type relationship , the result show a positive correlation Illegal Parking with 'Noise - Residential' , 'Noise - Street/Sidewalk' ,'Noise - Commercial' and 'Noise'.

Is this a coincidence? Or in the real world, there is indeed a connection between them, we use folium to draw a map to explore whether there is a spatial connection between them.

Each point in this plot is the total number in every month between two complain types. For example, in first plot, one point in (7807,2559) is mean in 2011-August, there are 7807 cases of Noise - Residential and 2259 cases of Illegal Parking.

In [7]:
car_noise = ['Noise - Residential' , 'Noise - Street/Sidewalk' ,'Noise - Commercial' ,'Noise']
park = 'Illegal Parking'

f1,p1 = plt.subplots(2, 2, figsize=(15,10))
k=0
n=0
big = 0
small = 0 
for kk in car_noise:
    
    temp = df[df["Complaint Type"]==kk]
    
    temp2 = df[df["Complaint Type"]==park]
    
    len1 = len(temp)
    len2 = len(temp2)
    if len1>len2:
        temp = temp.head(n = len2)
    else:
        temp2 = temp2.head(n = len1)
            
    slope, intercept, r_value, p_value, std_err = stats.linregress(temp['monthSize'],temp2['monthSize'])
    p1[n][k].scatter(temp['monthSize'],temp2['monthSize'],marker='o')
    p1[n][k].plot(temp['monthSize'], intercept + slope*temp['monthSize'], 'r',\
                  label='slope ~ R: '+str(round(slope, 2))+' ~ '+str(round(r_value, 2)))
    p1[n][k].set_xlabel(kk)
    p1[n][k].set_ylabel(park)
    p1[n][k].legend(loc="upper left")
    
    if(k == 1):
        n=n+1
        k=0
    else:
        k=k+1

Map structure introduction

In these four folium map, we grap 500 random GPS sample points from each catagory during year 2010-2020. The background heat map is always Illegal Parking distribution, then the points are sample from other four noise catagories.

Illegal Parking and Noise - Residential

In this map, The noisy concentrated areas are gathered near a large number of illegal parking. This is in line with certain practical conditions. It is likely that the noisy areas are densely populated and parking spaces are scarce.And this centralized distribution is relatively uniform, in line with the characteristics of people living.

In [8]:
# the relationship between Illegal Parking and Noise - Residential
import folium
from folium.plugins import HeatMap
# Map show
borough_map='Borough Boundaries.geojson'
# Do the plotting with sampling: Scatter plot
map_hooray_scatter=folium.Map(location=[40.7128, -74.0060],tiles = "Stamen Toner",zoom_start=10.5)
selected=df_whole[df_whole['Complaint Type']=='Illegal Parking'].sample(500)
selected=selected[~np.isnan(selected['Latitude'])]
cmlist=selected[['Latitude','Longitude']].values.tolist()

selected_NR=df_whole[df_whole['Complaint Type']=='Noise - Residential'].sample(500)
selected_NR=selected_NR[~np.isnan(selected_NR['Latitude'])]
cmlist_NR=selected_NR[['Latitude','Longitude']].values.tolist()

folium.GeoJson(borough_map).add_to(map_hooray_scatter)
# for i in cmlist:
#     folium.CircleMarker(i,width=30, height=30, radius=3.5, weight=2.0, color='#0000CC', fill_color='#0066FF', opacity=0.75, fill_opacity=0.5).add_to(map_hooray_scatter)

for i in cmlist_NR:
    folium.CircleMarker(i,width=30, height=30, radius=3.5, weight=2.0, color='#0000CC', fill_color='#0066FF', opacity=0.75, fill_opacity=0.5).add_to(map_hooray_scatter)
HeatMap(cmlist,max_zoom=1000000,radius=20).add_to(map_hooray_scatter)
map_hooray_scatter
Out[8]:

Illegal Parking and Noise - Street/Sidewalk

In the vicinity of illegal parking, there will be a lot of noise. Obviously, this noisy gathering is more concentrated in the middle of the illegal parking area, which may be a street or a commercial area where the crowd is concentrated.

In [9]:
# the relationship between Illegal Parking and Noise - Street/Sidewalk
import folium
from folium.plugins import HeatMap
# Map show
borough_map='Borough Boundaries.geojson'
# Do the plotting with sampling: Scatter plot
map_hooray_scatter=folium.Map(location=[40.7128, -74.0060],tiles = "Stamen Toner",zoom_start=10.5)
selected=df_whole[df_whole['Complaint Type']=='Illegal Parking'].sample(500)
selected=selected[~np.isnan(selected['Latitude'])]
cmlist=selected[['Latitude','Longitude']].values.tolist()

selected_NR=df_whole[df_whole['Complaint Type']=='Noise - Street/Sidewalk'].sample(500)
selected_NR=selected_NR[~np.isnan(selected_NR['Latitude'])]
cmlist_NR=selected_NR[['Latitude','Longitude']].values.tolist()

folium.GeoJson(borough_map).add_to(map_hooray_scatter)
# for i in cmlist:
#     folium.CircleMarker(i,width=30, height=30, radius=3.5, weight=2.0, color='#0000CC', fill_color='#0066FF', opacity=0.75, fill_opacity=0.5).add_to(map_hooray_scatter)

for i in cmlist_NR:
    folium.CircleMarker(i,width=30, height=30, radius=3.5, weight=2.0, color='#0000CC', fill_color='#0066FF', opacity=0.75, fill_opacity=0.5).add_to(map_hooray_scatter)
HeatMap(cmlist,max_zoom=1000000,radius=20).add_to(map_hooray_scatter)
map_hooray_scatter
Out[9]:

Illegal Parking and Noise - Commercial

Noise-Commercial is more concentrated than Noise-Street / Sidewalk, and the range of commercial activities may be relatively fixed, not as widely distributed as the street. In areas where Noise-Commercia is more concentrated, it often coincides with dense parking spots. It shows that commercial noise and the use of transportation may be closely related.

In [19]:
# the relationship between Illegal Parking and Noise - Commercial
import folium
from folium.plugins import HeatMap
# Map show
borough_map='Borough Boundaries.geojson'
# Do the plotting with sampling: Scatter plot
map_hooray_scatter=folium.Map(location=[40.7128, -74.0060],tiles = "Stamen Toner",zoom_start=10.5)
selected=df_whole[df_whole['Complaint Type']=='Illegal Parking'].sample(500)
selected=selected[~np.isnan(selected['Latitude'])]
cmlist=selected[['Latitude','Longitude']].values.tolist()

selected_NR=df_whole[df_whole['Complaint Type']=='Noise - Commercial'].sample(500)
selected_NR=selected_NR[~np.isnan(selected_NR['Latitude'])]
cmlist_NR=selected_NR[['Latitude','Longitude']].values.tolist()

folium.GeoJson(borough_map).add_to(map_hooray_scatter)
# for i in cmlist:
#     folium.CircleMarker(i,width=30, height=30, radius=3.5, weight=2.0, color='#0000CC', fill_color='#0066FF', opacity=0.75, fill_opacity=0.5).add_to(map_hooray_scatter)

for i in cmlist_NR:
    folium.CircleMarker(i,width=30, height=30, radius=3.5, weight=2.0, color='#0000CC', fill_color='#0066FF', opacity=0.75, fill_opacity=0.5).add_to(map_hooray_scatter)
HeatMap(cmlist,max_zoom=1000000,radius=20).add_to(map_hooray_scatter)
map_hooray_scatter
Out[19]:

Illegal Parking and Noise

From the figure, we can see that the noise is very concentrated in a specific area, this area is the most prosperous Manhattan. Explain that noise is related to specific areas. The reason behind it still needs us to explore further.

In [20]:
# the relationship between Illegal Parking and Noise
# Map show
borough_map='Borough Boundaries.geojson'
# Do the plotting with sampling: Scatter plot
map_hooray_scatter=folium.Map(location=[40.7128, -74.0060],tiles = "Stamen Toner",zoom_start=10.5)
selected=df_whole[df_whole['Complaint Type']=='Illegal Parking'].sample(500)
selected=selected[~np.isnan(selected['Latitude'])]
cmlist=selected[['Latitude','Longitude']].values.tolist()

selected_NR=df_whole[df_whole['Complaint Type']=='Noise'].sample(500)
selected_NR=selected_NR[~np.isnan(selected_NR['Latitude'])]
cmlist_NR=selected_NR[['Latitude','Longitude']].values.tolist()

folium.GeoJson(borough_map).add_to(map_hooray_scatter)
# for i in cmlist:
#     folium.CircleMarker(i,width=30, height=30, radius=3.5, weight=2.0, color='#0000CC', fill_color='#0066FF', opacity=0.75, fill_opacity=0.5).add_to(map_hooray_scatter)

for i in cmlist_NR:
    folium.CircleMarker(i,width=30, height=30, radius=3.5, weight=2.0, color='#0000CC', fill_color='#0066FF', opacity=0.75, fill_opacity=0.5).add_to(map_hooray_scatter)
HeatMap(cmlist,max_zoom=1000000,radius=20).add_to(map_hooray_scatter)
map_hooray_scatter
Out[20]:

Illegal Parking and Noise - Residential, they are both draw as point in same map.Blue is Illegal Parking, the red is Noise - Residential.

In [12]:
import folium
# Map show
borough_map='Borough Boundaries.geojson'
# Do the plotting with sampling: Scatter plot
map_hooray_scatter=folium.Map(location=[40.7128, -74.0060],tiles = "Stamen Toner",zoom_start=10.5)
selected=df_whole[df_whole['Complaint Type']=='Illegal Parking'].sample(200)
selected=selected[~np.isnan(selected['Latitude'])]
cmlist=selected[['Latitude','Longitude']].values.tolist()

selected_NR=df_whole[df_whole['Complaint Type']=='Noise - Residential'].sample(200)
selected_NR=selected_NR[~np.isnan(selected_NR['Latitude'])]
cmlist_NR=selected_NR[['Latitude','Longitude']].values.tolist()

folium.GeoJson(borough_map).add_to(map_hooray_scatter)
for i in cmlist:
    folium.CircleMarker(i,width=30, height=30, radius=3.5, weight=2.0, color='#0000CC', fill_color='#0066FF', opacity=0.75, fill_opacity=0.5).add_to(map_hooray_scatter)

for i in cmlist_NR:
    folium.CircleMarker(i,width=30, height=30, radius=3.5, weight=2.0, color='#f00', fill_color='#8e2929', opacity=0.75, fill_opacity=0.5).add_to(map_hooray_scatter)
    
map_hooray_scatter
Out[12]:
In [13]:
# plot all plot with five noise and 49 others
for i in dfNoise:
    f1,p1 = plt.subplots(7, 7, figsize=(40,40))
    k=0
    n=0
    ci = 0 
    
    for j in dfT50['Complaint Type']:
        temp = df[df["Complaint Type"]==i]
        if i==j:
            continue
        temp2 = df[df["Complaint Type"]==j]
        len1 = len(temp)
        len2 = len(temp2)
#         print(str(len1)+i+'-'+str(len2)+j)
        if len1>len2:
            temp = temp.head(n = len2)
        else:
            temp2 = temp2.head(n = len1)
        slope, intercept, r_value, p_value, std_err = stats.linregress(temp['monthSize'],temp2['monthSize'])
        p1[n][k].scatter(temp['monthSize'],temp2['monthSize'],marker='o')
        p1[n][k].plot(temp['monthSize'], intercept + slope*temp['monthSize'], 'r',label='slope: '+str(round(slope, 2)))
        p1[n][k].set_xlabel(i)
        p1[n][k].set_ylabel(j)
        p1[n][k].legend(loc="upper left")
        if(k == 6):
            n=n+1
            k=0
        else:
            k=k+1
    name = i.replace('/', '-', 1)
#     f1.savefig('img/connection-'+name+'.png')
In [14]:
# dfNoise = ['Noise - Residential' , 'Noise - Street/Sidewalk' ,'Noise - Commercial' ,'Noise - Vehicle','Noise']
nr = ['Illegal Parking', 'Blocked Driveway', 'Noise', 'Sewer', 'PAINT - PLASTER', 'Noise - Commercial']
ns = ['Noise - Residential', 'Illegal Parking', 'Blocked Driveway', 'HEATING', 'Traffic Signal Condition', 'Damaged Tree', 'Rodent', 'Consumer Complaint', 'New Tree Request', 'Overgrown Tree/Branches', 'Maintenance or Facility', 'Elevator', 'Root/Sewer/Sidewalk Condition']
nc = ['Noise - Residential', 'Illegal Parking', 'Blocked Driveway', 'PLUMBING', 'Water System', 'GENERAL CONSTRUCTION', 'Noise', 'Noise - Street/Sidewalk', 'Taxi Complaint']
nv = ['Noise - Residential', 'HEAT/HOT WATER', 'Street Light Condition', 'HEATING', 'Noise - Street/Sidewalk', 'UNSANITARY CONDITION', 'Traffic Signal Condition', 'Sewer', 'Dirty Conditions', 'Sanitation Condition', 'Rodent', 'Building/Use', 'Derelict Vehicles', 'Consumer Complaint', 'Graffiti', 'New Tree Request', 'Overgrown Tree/Branches', 'Maintenance or Facility', 'Elevator', 'Root/Sewer/Sidewalk Condition', 'Food Establishment']
no = ['Noise - Residential', 'HEATING', 'Noise - Street/Sidewalk', 'Traffic Signal Condition', 'Dirty Conditions', 'New Tree Request', 'Overgrown Tree/Branches', 'Root/Sewer/Sidewalk Condition', 'Illegal Parking', 'Blocked Driveway', 'PLUMBING', 'General Construction/Plumbing', 'Noise - Commercial', 'Broken Muni Meter', 'Taxi Complaint']
select = [nr,ns,nc,nv,no]
total = len(nr)+len(ns)+len(nc)+len(nv)+len(no)
In [15]:
# select all necessary plot
f1,p1 = plt.subplots(8, 8, figsize=(40,40))
k=0
n=0
big = 0
small = 0 
s1 = []
s2 = []
for kk in range(5):
    i = dfNoise[kk]
    cat =select[kk]
# for i in dfNoise:
    
    
    
    for j in cat:
        temp = df[df["Complaint Type"]==i]
        temp2 = df[df["Complaint Type"]==j]
        len1 = len(temp)
        len2 = len(temp2)
        if len1>len2:
            temp = temp.head(n = len2)
        else:
            temp2 = temp2.head(n = len1)
        slope, intercept, r_value, p_value, std_err = stats.linregress(temp['monthSize'],temp2['monthSize'])
        if slope > 0:
            big = big+1
            s1.append([i,j,slope])
        elif slope <= 0:
            small = small+1
            s2.append([i,j,slope])
        p1[n][k].scatter(temp['monthSize'],temp2['monthSize'],marker='o')
        p1[n][k].plot(temp['monthSize'], intercept + slope*temp['monthSize'], 'r',\
                      label='slope ~ R: '+str(round(slope, 2))+' ~ '+str(round(r_value, 2)))
        p1[n][k].set_xlabel(i)
        p1[n][k].set_ylabel(j)
        p1[n][k].legend(loc="upper left")
        if(k == 7):
            n=n+1
            k=0
        else:
            k=k+1
            
# name = i.replace('/', '-', 1)
#     f1.savefig('img/connection-'+name+'.png')
print("slope>0:"+str(big))
print("slope<=0:"+str(small))
slope>0:47
slope<=0:17
In [16]:
ss =s1 + s2
print(len(ss))
ss[6]
64
Out[16]:
['Noise - Street/Sidewalk', 'Blocked Driveway', 0.4999629814283976]
In [17]:
# select all necessary plot and split by slope > 0 && slope < 0 
f1,p1 = plt.subplots(8, 8, figsize=(50,50))
k=0
n=0
big = 0
small = 0 
s1 = []
s2 = []
for kk in ss:
#     x
    i = kk[0]
#     y
    j = kk[1]
#     print(i)
#     print(j)
    
    temp = df[df["Complaint Type"]==i]
    
    
    temp2 = df[df["Complaint Type"]==j]
    len1 = len(temp)
    len2 = len(temp2)
    if len1>len2:
        temp = temp.head(n = len2)
    else:
        temp2 = temp2.head(n = len1)
    slope, intercept, r_value, p_value, std_err = stats.linregress(temp['monthSize'],temp2['monthSize'])
    p1[n][k].scatter(temp['monthSize'],temp2['monthSize'],marker='o')
    p1[n][k].plot(temp['monthSize'], intercept + slope*temp['monthSize'], 'r',label='slope: '+str(round(slope, 2)))
    p1[n][k].set_xlabel(i)
    p1[n][k].set_ylabel(j)
    p1[n][k].legend(loc="upper left")
    if(k == 7):
        n=n+1
        k=0
    else:
        k=k+1
In [18]:
tree_noise = [ 'Noise - Street/Sidewalk' ,'Noise - Vehicle','Noise','Noise - Street/Sidewalk' ,'Noise - Vehicle','Noise']
new_tree = 'New Tree Request'
over_tree = 'Overgrown Tree/Branches'             
In [ ]:
 
In [ ]: